Chunk 1: Load packages and set working directory
Un_Inf <- Read10X('/Users/aj397/Documents/Data/HuLu 10X 2023/N1468/filtered_feature_bc_matrix')
IAV_Inf <- Read10X('/Users/aj397/Documents/Data/HuLu 10X 2023/N1469/filtered_feature_bc_matrix')
Un_Inf <- CreateSeuratObject(counts = Un_Inf, project = "Un_Inf", min.cells = 1, min.features = 1)
Un_Inf[["percent.mt"]] <- PercentageFeatureSet(object = Un_Inf, pattern = "^MT-")
sce1 <- as.SingleCellExperiment(Un_Inf)
## Warning: Layer 'data' is empty
## Warning: Layer 'scale.data' is empty
#This step runs doublet detection and puts a probability of being a doublent in the metadata as "scDblFinder.score"
sce1 <- scDblFinder(sce1)
## Creating ~2362 artificial doublets...
## Dimensional reduction
## Evaluating kNN...
## Training model...
## iter=0, 250 cells excluded from training.
## iter=1, 262 cells excluded from training.
## iter=2, 254 cells excluded from training.
## Threshold found:0.465
## 155 (5.3%) doublets called
Un_Inf[['scDblFinder.score']]<-sce1$scDblFinder.score
Un_Inf[['scDblFinder.class']]<-sce1$scDblFinder.class
cutoff <- sum(quantile(Un_Inf@meta.data$nFeature_RNA, probs = c(0.75)), iqr(Un_Inf@meta.data$nFeature_RNA)*1.5)
Un_Inf <- subset(Un_Inf, subset = nFeature_RNA < cutoff)
rm(sce1)
# Chunk 3: Read and preprocess IAV infected sample
IAV_Inf <- CreateSeuratObject(counts = IAV_Inf, project = "IAV_Inf", min.cells = 1, min.features = 1)
DotPlot(IAV_Inf, assay = "RNA", features = "fluPB1", )
## Warning: No layers found matching search pattern provided
## Warning in FetchData.Assay5(object = object[[DefaultAssay(object = object)]], :
## data layer is not found and counts layer is used
## Warning: Only one identity present, the expression values will be not scaled

IAV_Inf[["percent.mt"]] <- PercentageFeatureSet(object = IAV_Inf, pattern = "^MT-")
sce1 <- as.SingleCellExperiment(IAV_Inf)
## Warning: Layer 'data' is empty
## Warning: Layer 'scale.data' is empty
sce1 <- scDblFinder(sce1)
## Creating ~1792 artificial doublets...
## Dimensional reduction
## Evaluating kNN...
## Training model...
## iter=0, 186 cells excluded from training.
## iter=1, 198 cells excluded from training.
## iter=2, 194 cells excluded from training.
## Threshold found:0.665
## 117 (5.2%) doublets called
IAV_Inf[['scDblFinder.score']]<-sce1$scDblFinder.score
IAV_Inf[['scDblFinder.class']]<-sce1$scDblFinder.class
cutoff <- sum(quantile(IAV_Inf@meta.data$nFeature_RNA, probs = c(0.75)), iqr(IAV_Inf@meta.data$nFeature_RNA)*1.5)
IAV_Inf <- subset(IAV_Inf, subset = nFeature_RNA < cutoff)
rm(sce1)
# Chunk 4: Merge and QC metrics
HuLu <- merge(x=Un_Inf, y=c(IAV_Inf))
HuLu[["percent.ribo"]] <- PercentageFeatureSet(object = HuLu, pattern = "^RP[SL]")
HuLu[['percent.HB']]<- PercentageFeatureSet(object = HuLu, pattern = "^HB[^(P)]")
HuLu[['log10GenesPerUMI']] <- log10(HuLu$nFeature_RNA) / log10(HuLu$nCount_RNA)
s.genes <- cc.genes.updated.2019$s.genes
g2m.genes <- cc.genes.updated.2019$g2m.genes
VlnPlot(object = HuLu, features = c("nFeature_RNA", "nCount_RNA",'log10GenesPerUMI', "percent.mt", "percent.ribo", "scDblFinder.score"),ncol = 3, group.by = "orig.ident", pt.size = 0.01)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

HuLu <- subset(x = HuLu, subset = nFeature_RNA > 350 & nCount_RNA > 600 & percent.mt < 25 & log10GenesPerUMI >0.7)
# Chunk 5: Normalization, variable features, PCA
HuLu <- NormalizeData(HuLu) %>%
FindVariableFeatures() %>%
ScaleData() %>%
RunPCA()
## Normalizing layer: counts.Un_Inf
## Normalizing layer: counts.IAV_Inf
## Finding variable features for layer counts.Un_Inf
## Finding variable features for layer counts.IAV_Inf
## Centering and scaling data matrix
## PC_ 1
## Positive: CD69, IL7R, RGS1, CCL5, CLEC2B, KLRB1, CCL4, TRBC1, MT-CO3, DDIT4
## NKG7, GNLY, GZMB, CD8A, PRF1, MT-CYB, CREM, GZMA, ICOS, CD40LG
## CTLA4, CMC1, CXCR6, GIMAP7, KLRC1, TRGC2, LINC02446, LY9, MALAT1, TRGC1
## Negative: SDC4, GPRC5A, ELF3, CLDN4, KRT19, KRT8, TACSTD2, EPCAM, CD9, KRT18
## PRSS8, EMP2, SPINT2, MAL2, WFDC2, TM4SF1, SFTA2, NAPSA, NEDD4L, RNASE1
## EFNA1, IFITM3, MUC1, S100A14, NUPR1, CEBPD, ERRFI1, MGST1, CLDN7, LAMB3
## PC_ 2
## Positive: LYZ, CD163, CD14, VCAN, FCGR2B, THBS1, PLAUR, FCGR2A, AIF1, CLEC7A
## CD86, CXCL8, FCER1G, SLC11A1, TYROBP, MS4A7, EREG, CD68, BASP1, KYNU
## C5AR1, MAFB, TIMP1, VSIG4, EPB41L3, IL1R2, SPI1, S100A8, GPNMB, S100A9
## Negative: ELF3, CLDN4, TACSTD2, KRT8, HOPX, PERP, MAL2, IL7R, WFDC2, KRT18
## LMO7, KRT19, GPRC5A, CLDN3, EPCAM, SFTPB, CLDN7, PRSS8, TMC5, SFTA2
## NAPSA, KRT7, SLC34A2, MUC1, IFT57, ATP1B1, LAMB3, EHF, DSTN, NKX2-1
## PC_ 3
## Positive: C5orf49, FAM183A, TMEM190, RSPH1, CAPS, C20orf85, PIFO, CCDC170, TSPAN1, CETN2
## CLDN3, C9orf24, FAM81B, ZMYND10, MLF1, MORN2, TPPP3, DNAH11, C1orf194, DNAAF1
## DNALI1, C9orf116, CCDC78, CABCOCO1, ODF3B, TEKT2, FAM92B, MAPK15, MOK, CFAP157
## Negative: NAPSA, SFTPA2, SFTPB, SFTPA1, PGC, SLC34A2, SFTA2, SFTPD, PEBP4, SFTPC
## S100A14, C3, ABCA3, C11orf96, SCD, C2, CXCL17, SLC22A31, CRTAC1, SERPINA1
## MALL, SDR16C5, C4BPA, DRAM1, NRGN, SLPI, ALPL, MPZL2, PRSS8, PDZK1IP1
## PC_ 4
## Positive: SERPINA1, CSF3R, ACSL1, CXCL17, SLPI, S100A9, SFTPA2, NAPSA, PGC, PIGR
## SFTPA1, CXCL8, CTSH, MGST1, SLC34A2, SFTPB, MUC1, SFTPD, NPC2, WFDC2
## TREM1, C2, SLC11A1, IL1R2, C3, SFTPC, BCL2A1, S100A8, ABCA3, TLR2
## Negative: CALCRL, CLDN5, SERPINE1, SPARCL1, DEPP1, PODXL, TIMP3, GPX3, AKAP12, SPARC
## SRPX, EGFL7, TGM2, TCF4, COX7A1, ADGRL4, CRIP2, MGP, CDH5, PLAT
## SLCO4A1, VWF, KANK3, CLIC4, CALD1, PTGIS, PECAM1, PTPRB, GNG11, FLT1
## PC_ 5
## Positive: TFPI, SCD, NRGN, CXCL17, C3, C1R, PIGR, IGFBP4, C2, CRTAC1
## SFTPA2, ABCA3, SLC22A31, ARFGEF3, LRP2, SERPINA1, C11orf96, SFTPA1, CP, FASN
## NNMT, EPHX1, MT-ATP6, SFTPC, SELENOP, FADS1, MT-CO3, PGC, LAMP3, MACROD2
## Negative: AGER, CLDN18, ANKRD1, CRYAB, CEACAM6, KRT7, SCEL, TENT5B, CAV1, EDN1
## UPK3B, GADD45B, GJA1, HSPB8, CGN, FSTL3, EMP2, ANXA3, SCNN1G, SFN
## KLF10, SEMA3B, MYL9, KLK11, NRG1, COL4A1, MYRF, ICAM1, CAV2, TNFRSF12A
# Chunk 6: Cell cycle + Harmony
HuLu <- JoinLayers(HuLu)
HuLu <- CellCycleScoring(HuLu, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
HuLu <- RunHarmony(HuLu, group.by.vars = c("orig.ident", "Phase"))
## Transposing data matrix
## Initializing state using k-means centroids initialization
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony converged after 3 iterations
ElbowPlot(HuLu, ndims = 50, reduction = 'harmony')

# Chunk 7: UMAP + clustering
HuLu <- FindNeighbors(HuLu, reduction = 'harmony', dims = 1:50)
## Computing nearest neighbor graph
## Computing SNN
HuLu <- RunUMAP(HuLu, reduction = 'harmony', dims = 1:50)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 21:10:50 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:10:50 Read 3856 rows and found 50 numeric columns
## 21:10:50 Using Annoy for neighbor search, n_neighbors = 30
## 21:10:50 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:10:50 Writing NN index file to temp file /var/folders/8h/nlcjp3m90839bs8hs2n2jvx40000gp/T//RtmpxQZTcx/file12b1535541926
## 21:10:50 Searching Annoy index using 1 thread, search_k = 3000
## 21:10:51 Annoy recall = 100%
## 21:10:51 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 21:10:52 Initializing from normalized Laplacian + noise (using RSpectra)
## 21:10:53 Commencing optimization for 500 epochs, with 167236 positive edges
## 21:10:58 Optimization finished
HuLu <- FindClusters(HuLu, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3856
## Number of edges: 178836
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8454
## Number of communities: 16
## Elapsed time: 0 seconds
DimPlot(HuLu, label = T,
label.box = TRUE,
label.size = 3,
repel = TRUE)

# Chunk 8: Marker analysis + plots
#Defining Clusters
#Cluster 0 = CD4 T cells
FeaturePlot(HuLu,c("CD53", "PTPRC", "CORO1A", "ISG20", "CCL5", "CD40LG", "TNFRSF25",
"CD28", "CD4", "TRAT1")
, order = T)

#Cluster 1 = CD4 T cells
FeaturePlot(HuLu,c("CD53", "PTPRC", "CORO1A", "ISG20", "CCL5", "CD40LG", "TNFRSF25",
"CD28", "CD4", "TRAT1")
, order = T)

#Cluster 2 = CD8 T cells
FeaturePlot(HuLu,c("CD53", "CD53", "PTPRC", "CORO1A", "ISG20", "CCL5", "CD8A",
"CD8B", "TRGC2")
, order = T)

#Cluster 3 = CD8 T cells
FeaturePlot(HuLu,c("CD53", "CD53", "PTPRC", "CORO1A", "ISG20", "CCL5", "CD8A",
"CD8B", "TRGC2")
, order = T)

#Cluster 4 = T cells with high mitochondrial reads
FeaturePlot(HuLu, c('TRAC','percent.mt'), order = T)

#Cluster 5 = NK cells
FeaturePlot(HuLu,c("CD53", "PTPRC", "CORO1A", "NCAM1", "CCL5", "GNLY", "SPON2",
"FCGR3A")
, order = T)

#Cluster 6 = Secretory Epithelium
FeaturePlot(HuLu, c(
'KRT8','SCGB1A1','EPCAM','MUC1',
"FXYD3", "ELF3", "IGFBP2", "SERPINF1", "TSPAN1"), order = T)

#Cluster 7 = Monocyte
FeaturePlot(HuLu, c("CD53", "PTPRC", "CORO1A", "FCER1G", "C1orf162", "CLEC7A",
"S100A12", "FCN1", "CD14"), order = T)

#Cluster 8 = Mast cells
FeaturePlot(HuLu, c("MS4A2", "SLC18A2", "RGS13"), order = T)

#Cluster 9 = B cells
FeaturePlot(HuLu, c("MS4A1"), order = T)

#Cluster 10 = Macrophages
FeaturePlot(HuLu, c("CYP27A1", "MARCO", "FABP4"), order = T)

#Cluster 11 = Endothelial cells
FeaturePlot(HuLu, c("CLDN5", "ECSCR", "CLEC14A", "CCL21", "TFF3", "MMRN1", "LYVE1",
"CCL21"), order = T)

#Cluster 12 = NK cells
FeaturePlot(HuLu,c("CD53", "PTPRC", "CORO1A", "ISG20", "CCL5", "GNLY", "SPON2",
"FCGR3A"))

#Cluster 13 = CD4 T cells
FeaturePlot(HuLu,c("CD53", "PTPRC", "CORO1A", "ISG20", "CCL5", "CD40LG", "TNFRSF25",
"CD28", "CD4", "TRAT1"))

#Cluster 14 = NK cells
FeaturePlot(HuLu,c("CD53", "PTPRC", "CORO1A", "ISG20", "CCL5", "GNLY", "SPON2",
"FCGR3A")
, order = T)

#Cluster 15 = Multiciliated
FeaturePlot(HuLu, c("RSPH1", "C9orf24", "C20orf85"), order = T)

Temp <- scan(text = "
CYP27A1
MARCO
FABP4
",
what= "")
dput(Temp)
## c("CYP27A1", "MARCO", "FABP4")
new.cluster.ids <- c('CD4_T_cells', 'CD4_T_cells', 'CD8_T_cells',
'CD8_T_cells','T_cell_mito_high', "NK_cells", "Epithelial",
"Monocytes", 'Mast_cells', 'B_cells', 'Macrophages',
'Endothelial_cells', "NK_cells" , 'CD4_T_cells',
"NK_cells" , 'Multiciliated')
names(x = new.cluster.ids) <- levels(HuLu)
HuLu <- RenameIdents(object = HuLu, new.cluster.ids)
HuLu[["Cell.ID"]] <- Idents(object = HuLu)
Idents(HuLu)<-'Cell.ID'
DimPlot(HuLu, label = T)
